Observed log-chlorophyll at representative station in SF Bay Delta region.

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)

# flow data, left moving window average of 30 days
data(flow_dat)
fl_dat <- flow_dat %>% 
  filter(station %in% 'sjr') %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )
  
# format the data to model
data(sf_dat)
sf_sub <- sf_dat %>% 
  filter(Site_Code %in% 'P8') %>% 
  mutate(
    doy = yday(Date), 
    decyr = decimal_date(Date), 
    yr = year(Date),
    mo = month(Date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'Date') %>% 
  mutate(
    lnQ = log(qsm), 
    lnchl = log(chl)
    ) %>% 
  select(-q, -qsm, -station, -Latitude, -Longitude, -Location)

# plot, all
p <- ggplot(sf_sub, aes(x = Date, y = lnchl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_sub, aes(x = yr, y = lnchl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_sub, aes(x = mo, y = lnchl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Some simple GAMs to explore annual, seasonal trends.

# annual only
mod1 <- gam(lnchl ~ s(decyr, bs = 'tp'),
  data = sf_sub, 
  na.action = na.exclude
  )

# seasonal only
mod2 <- gam(lnchl ~ s(doy, bs = 'cc'), 
  knots = list(doy = c(1, 366)),
  data = sf_sub, 
  na.action = na.exclude
  )
 
# annual and seasonal, additive
mod3 <- gam(lnchl ~ s(decyr, bs = 'tp') + 
  s(doy, bs = 'cc'), 
  knots = list(doy = c(1, 366)),
  data = sf_sub, 
  na.action = na.exclude
  )

# annual and seasonal, interaction
mod4 <- gam(lnchl ~ te(decyr, doy, bs = c('tp', 'cc')),
  knots = list(doy = c(1, 366)),
  data = sf_sub, 
  na.action = na.exclude
  )

Summary stats of annual, seasonal models:

mods <- list(
  mod1 = mod1,
  mod2 = mod2, 
  mod3 = mod3,
  mod4 = mod4
  )

# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
mod1 s(decyr) 8.212548 8.830184 32.55938 0
mod2 s(doy) 7.083832 8.000000 16.06665 0
mod3 s(decyr) 8.394141 8.896837 41.39759 0
mod3 s(doy) 7.393221 8.000000 24.13289 0
mod4 te(decyr,doy) 16.859549 18.456279 25.60994 0
# summary stats of GAMs
map(mods, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
mod1 1188.287 0.3406925
mod2 1304.711 0.1845578
mod3 1030.524 0.5109149
mod4 1083.738 0.4625216
# prediction data
pred_dat <- data.frame(
    decyr = seq(min(sf_sub$decyr), max(sf_sub$decyr), length = 1000)
  ) %>% 
  mutate(
    Date = date_decimal(decyr), 
    Date = as.Date(Date),
    mo = month(Date, label = TRUE), 
    doy = yday(Date), 
    yr = year(Date)
  )

# predictions
sf_res <- pred_dat %>% 
  mutate(
    mod1 = predict(mod1, newdata = pred_dat), 
    mod2 = predict(mod2, newdata = pred_dat), 
    mod3 = predict(mod3, newdata = pred_dat), 
    mod4 = predict(mod4, newdata = pred_dat)
  ) %>% 
  tidyr::gather('mods', 'pred', -Date, -mo, -doy, -decyr, -yr)

# plot
p <- ggplot(sf_res, aes(x = Date)) + 
  geom_point(data = sf_sub, aes(y = lnchl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)
# plot
p <- ggplot(sf_res, aes(x = doy, group = factor(yr), colour = yr)) + 
  geom_line(aes(y = pred)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    ) + 
  facet_wrap(~ mods, ncol = 2)
ggplotly(p)

Adding flow data to the model:

# model using a tensor product smooth for interactions
modall <- gam(lnchl ~ te(decyr, doy, lnQ, bs = c('tp', 'cc', 'tp')),
  knots = list(doy = c(1, 366)),
  data = sf_sub,
  na.action = na.exclude
  )